/******************************************************************************
Author: Akshay Dixit
Date created: April 18, 2018
Date last modified: November 10, 2020
File Name: 2_in.do
Project: T4D Indonesia 
Purpose: Analysis of primary outcomes
******************************************************************************/

/* LIST OF PRIMARY OUTCOMES & THE ASSOCIATED VARIABLES
	
	Note that the following codes apply:
		V. Other, specify _________
		X. REFUSED
		Y. DON'T KNOW
		
		95. Other, specify _________
		97. REFUSED
		98. DON'T KNOW
		99. MISSING

RQ.1 Uptake of health services
	
	1. Delivery with a skilled attendant - pf16
	2. Delivery at a health facility - sk11A
	3. Postpartum & Postnatal care - pf40 pf40a
		
RQ.2 Content of health services: Combine into an index
	
	1. Delivery content of care - pf17 pf18 pf19 pf20 pf21 pf22
	2. Postpartum content of care (mother) - 
		a. pf41aA pf41bA pf41cA
		b. pf41aB pf41bB pf41cB
		c. pf41aD pf41bD pf41cD
		d. pf41aE pf41bE pf41cE
		e. pf41aF pf41bF pf41cF
		f. pf41aG pf41bG pf41cG
		g. pf41aH pf41bH pf41cH
		h. pf41aI pf41bI pf41cI
		i. pf41aJ pf41bJ pf41cJ
	3. Postnatal content of care (newborn) - 
		a. pf42aA pf42bA pf42cA
		b. pf42aB pf42bB pf42cB
		c. pf42aC pf42bC pf42cC
		d. pf42aD pf42bD pf42cD
		e. pf42aE pf42bE pf42cE
		f. pf42aF pf42bF pf42cF
		g. pf42aG pf42bG pf42cG
		h. pf42aH pf42bH pf42cH
		i. pf42aI pf42bI pf42cI

RQ.3 Health outcomes

	1. Weight - pf14
	2. Height - pf15
		Age - sk02_mth sk02_yr

RQ.4 Empowerment

	1. Participation - ks03 ks07 ks09_a ks09_b ks09_c ks09_d ks09_e ks09_f ks09_g
	2. Perceptions of empowerment - ks11, adjusted using ks12a ks12b ks12c ks13	

*/

clear all
set more off

cd "$data"

********************************************************************************

* PREPARING PRIMARY OUTCOMES - RQ.1

u "household_data.dta", clear

desc pf16 sk11A pf40 pf40a
tab pf16	//No "don't know"s, No missing values
tab sk11A	//No "don't know"s, No missing values
tab pf40	//2 "don't know"s, No missing values
tab pf40a	//2 "don't know"s, No missing values

***Delivery with a skilled attendant***

g skilled_provider = substr(pf16,1,1)
g skilled_provider_birth = (skilled_provider == "A" | skilled_provider == "B" | skilled_provider == "C")
drop skilled_provider

***Delivery at a facility***

g facility_birth = (sk11A == 1 | sk11A == 2 | sk11A == 3 | sk11A == 4 | sk11A == 5 | sk11A == 6 | sk11A == 7 | sk11A == 8 | sk11A == 9)

***Postpartum & Postnatal care***
	
	*Code Don't Know/Refused as missing
	foreach var in pf40 pf40a {
		replace `var' = . if (`var' == 98 | `var' == 97 | `var' == 99)
	}

g postpartum_care = (pf40 == 1)
replace postpartum_care = . if pf40 == .

g postnatal_care = (pf40a == 1)
replace postnatal_care = . if pf40a == .

g postpartum_postnatal_care = (pf40 == 1 & pf40a == 1)
replace postpartum_postnatal_care = . if (pf40 == . | pf40a == .) 
		
********************************************************************************

* PREPARING PRIMARY OUTCOMES - RQ.2

***Create/clean the necessary variables***

	*Delivery content of care
desc pf17 pf18 pf19 pf20 pf21 pf22
local dcc pf17 pf18 pf19 pf20 pf21 pf22
foreach var of local dcc {
		replace `var' = . if (`var' == 98 | `var' == 97 | `var' == 99) 		//Code Don't Know/Refused as missing
		replace `var' = 0 if `var' == 3
		tab `var'
	}
replace pf19 = 1 if pf19 == . & facility_birth == 1
	//For births at a facility, it is assumed that PF 19 = Yes

	*Postpartum content of care
desc pf41aA pf41aB pf41aC pf41aD pf41aE pf41aF pf41aG pf41aH pf41aI pf41aJ 
local ppcc pf41aA pf41aB pf41aD pf41aE pf41aF pf41aG pf41aH pf41aI pf41aJ 
foreach var of local ppcc {
		replace `var' = . if (`var' == 98 | `var' == 97 | `var' == 99 | `var' == 96)
		replace `var' = 0 if `var' == 3
		tab `var'
	}
	
	*Postnatal content of care
desc pf42a*
local pncc pf42aA pf42aB pf42aC pf42aD pf42aE pf42aF pf42aG pf42aH pf42aI
foreach var of local pncc {
		replace `var' = . if (`var' == 98 | `var' == 97 | `var' == 99)
		replace `var' = 0 if `var' == 3
		tab `var'
	}
	
*****************************************************
	
***Impute missing values with the treatment assignment group mean***

	*Delivery content of care
local dcc pf17 pf18 pf19 pf20 pf21 pf22
	foreach var of local dcc {
		bysort treatment: egen mean_`var' = mean(`var')
		replace `var' = mean_`var' if `var' == .
		drop mean_`var'
	}
	
	*Postpartum content of care
local ppcc pf41aA pf41aB pf41aD pf41aE pf41aF pf41aG pf41aH pf41aI pf41aJ
	foreach var of local ppcc {
		bysort treatment: egen mean_`var' = mean(`var')
		replace `var' = mean_`var' if `var' == .
		drop mean_`var'
	}

	*Postnatal content of care
local pncc pf42aA pf42aB pf42aC pf42aD pf42aE pf42aF pf42aG pf42aH pf42aI
	foreach var of local pncc {
		bysort treatment: egen mean_`var' = mean(`var')
		replace `var' = mean_`var' if `var' == .
		*replace `var' = . if sk03 == 1		//This line, if executed, excludes still-births from the analysis
		drop mean_`var'
	}

*****************************************************
	
***Individual content of care outcomes***
egen delivery_content = rowtotal(pf17 pf18 pf19 pf20 pf21 pf22), missing
egen postpartum_content = rowtotal(pf41aA pf41aB pf41aD pf41aE pf41aF pf41aG pf41aH pf41aI pf41aJ), missing
egen postnatal_content = rowtotal(pf42aA pf42aB pf42aC pf42aD pf42aE pf42aF pf42aG pf42aH pf42aI), missing
*replace postnatal_content = . if sk03 == 1		//This line, if executed, excludes still-births from the analysis

*****************************************************

***Content of care index***	

	*1. All variables are already oriented so that higher values represent "better" outcomes
	
	*2. Standardize each outcome - using the mean and s.d. of the control group
	local content delivery_content postpartum_content postnatal_content
	foreach var of local content {
		sum `var' if treatment == 0
		scalar mean_`var' = r(mean)
		scalar sd_`var' = r(sd)
		g z_`var' = (`var' - mean_`var')/sd_`var'
	}
	
	*3. Missing value imputation already done
	
	*4. Compile summary index
	egen z_content = rowmean(z_delivery_content z_postpartum_content z_postnatal_content)
	sum z_content
	
********************************************************************************

* PREPARING PRIMARY OUTCOMES - RQ.3

**Examining data on weight**
desc pf14* pf15* sk02_mth sk02_yr
tab pf14_a 
tab sk03 if pf14_a == .
tab sk07 if pf14_a == .
	/* 136 missing values (out of 5997).
	   Of these 136, 44 were stillbirths.
	   Of the remaining 92, 79 are dead.
	   There are 13 missing values. Reasons specified, in variable pf14_x_ot.  
	*/
tab pf14_c
g weight_diff_ab = abs(pf14_a - pf14_b)
tab weight_diff_ab
count if weight_diff_ab >= 0.2 & weight_diff_ab != .
	/* The number of non-missing values in pf14_c is the exact same as the number
	   of cases in which the absolute difference between pf14_a and pf14_b is 
	   more than 0.1 kg. Hence, the 3rd measurement has been taken in all cases where
	   the difference between the first two is more than 0.1
	*/
	
**Examining data on height**	
tab pf15_a
tab sk03 if pf15_a == .
tab sk07 if pf15_a == . 
	/* 137 missing values (out of 5997).
	   Of these 137, 44 were stillbirths.
	   Of the remaining 93, 79 are dead.
	   There are 14 missing values. Reasons specified, in variable pf15_x_ot.  
	*/
tab pf15_c
g height_diff_ab = abs(pf15_a - pf15_b)
tab height_diff_ab
count if height_diff_ab >= 0.7 & height_diff_ab != .
	/* The number of non-missing values in pf15_c is the exact same as the number
	   of cases in which the absolute difference between pf15_a and pf15_b is 
	   more than 0.7 cm. Hence, the 3rd measurement has been taken in all cases where
	   the difference between the first two is more than 0.7.
	*/

**Examining missing values in both height and weight**
count if pf14_b == . & pf14_a != .
count if pf15_b == . & pf15_a != .
count if pf14_a == . & pf15_a != .
count if pf15_a == . & pf14_a != .
	/*
		0 cases where there is height, but no weight measurement.
		1 case where there is weight, but no height measurement.
	*/

**WEIGHT -- Combine the different readings into a single measure**	
g weight_diff_bc = abs(pf14_b - pf14_c)
g weight_diff_ac = abs(pf14_a - pf14_c) 
egen least_weight_diff = rowmin(weight_diff_ab weight_diff_bc weight_diff_ac) 
egen weight_ab = rowmean(pf14_a pf14_b)
egen weight_bc = rowmean(pf14_b pf14_c)
egen weight_ac = rowmean(pf14_a pf14_c)
g weight = .
replace weight = weight_ab if least_weight_diff == weight_diff_ab
replace weight = weight_bc if ((least_weight_diff == weight_diff_bc) & (pf14_c != .)) 
replace weight = weight_ac if ((least_weight_diff == weight_diff_ac) & (pf14_c != .))
drop *weight_diff* weight_*

**HEIGHT -- Combine the different readings into a single measure**	
g height_diff_bc = abs(pf15_b - pf15_c)
g height_diff_ac = abs(pf15_a - pf15_c) 
egen least_height_diff = rowmin(height_diff_ab height_diff_bc height_diff_ac) 
egen height_ab = rowmean(pf15_a pf15_b)
egen height_bc = rowmean(pf15_b pf15_c)
egen height_ac = rowmean(pf15_a pf15_c)
g height = .
replace height = height_ab if least_height_diff == height_diff_ab
replace height = height_bc if ((least_height_diff == height_diff_bc) & (pf15_c != .)) 
replace height = height_ac if ((least_height_diff == height_diff_ac) & (pf15_c != .))
drop *height_diff* height_*

**Age of the infant**
desc sk02*
tab sk02_x	
tab sk02_mth
tab sk02_yr		//No missing values
g sk02_day = 15
g date_birth = mdy(sk02_mth, sk02_day, sk02_yr)
format date_birth %d
format date_survey %d
g days_between = date_survey-date_birth
g age_in_months = round(days_between/30) 
drop days_between

**Sex of the infant**
tab sk05
count if sk05 == . & sk03 == 1	
tab sk03
	//44 missing values, all still-births
ren sk05 sex_of_infant

**Z-score: Height-for-age**
egen haz = zanthro(height, ha, WHO), xvar(age_in_months) gender(sex_of_infant) gencode(male=1, female=3) ageunit(month)
	//5860 height measurements, of which 40 are 5+ standard deviations away from the mean and dropped from the analysis
		
**Z-score: Weight-for-age**
egen waz = zanthro(weight, wa, WHO), xvar(age_in_months) gender(sex_of_infant) gencode(male=1, female=3) ageunit(month)
	//5861 weight measurements, of which 22 are 5+ standard deviations away from the mean and dropped from the analysis
	
	
**Creating outcomes**
g stunted = (haz < -2)
replace stunted = . if haz == .
g missing_stunted = (stunted == .)
bysort treatment: tab missing_stunted

g underweight = (waz < -2)
replace underweight = . if waz == .
g missing_underweight = (underweight == .)
bysort treatment: tab missing_underweigh
	
********************************************************************************

* PREPARING PRIMARY OUTCOMES - RQ. 4

***Participation - with KS07***
desc ks03 ks07 ks09_a ks09_b ks09_c ks09_d ks09_e ks09_f ks09_g
local participation ks03 ks07 ks09_a ks09_b ks09_c ks09_d ks09_e ks09_f ks09_g
foreach var of local participation {
	tab `var'
	replace `var' = . if (`var' == 97 | `var' == 98 | `var' == 99)
}

g participation_2 = (ks07 != 1)
replace participation_2 = . if ks07 == .

local participation_dummy ks03 ks09_a ks09_b ks09_c ks09_d ks09_e ks09_f ks09_g
foreach var of local participation_dummy {
	g `var'_dummy = (`var' == 1)
	replace `var'_dummy = . if `var' == .
}

rename ks03_dummy participation_1

g participation_3 = (ks09_a_dummy == 1 | ks09_b_dummy == 1 | ks09_c_dummy == 1 | ///
	ks09_d_dummy == 1 | ks09_e_dummy == 1 | ks09_f_dummy == 1 | ks09_g_dummy == 1)
replace participation_3 = . if (ks09_a_dummy == . | ks09_b_dummy == . | ks09_c_dummy == . | ///
	ks09_d_dummy == . | ks09_e_dummy == . | ks09_f_dummy == . | ks09_g_dummy == .)
	

*Creating index of participation
	
	*1. All variables are already oriented so that the higher value (1) represents the "better" outcome
	
	*3. Impute missing values at treatment assignment group mean
	local index_participation participation_1 participation_2 participation_3
	foreach var of local index_participation {
		bysort treatment: egen mean_`var' = mean(`var')
		replace `var' = mean_`var' if `var' == .
	}
	drop mean_participation_* 
	
	*2. Standardize each outcome - using the mean and s.d. of the control group
	sum participation_1 if treatment == 0
	scalar mean_1 = r(mean)
	scalar sd_1 = r(sd)
	g z_participation_1 = (participation_1 - mean_1)/sd_1
	
	sum participation_2 if treatment == 0
	scalar mean_2 = r(mean)
	scalar sd_2 = r(sd)
	g z_participation_2 = (participation_2 - mean_2)/sd_2
	
	sum participation_3 if treatment == 0
	scalar mean_3 = r(mean)
	scalar sd_3 = r(sd)
	g z_participation_3 = (participation_3 - mean_3)/sd_3
	
	*4. Compile summary index
	egen participation = rowmean(z_participation_1 z_participation_2 z_participation_3) 
	

***Empowerment vignettes***
desc ks11 ks12*
foreach var in ks11 ks12a ks12b ks12c {
	tab `var'
	tab treatment, sum(`var')
	tab lk01_cd, sum(`var')
}

lab drop KS11_EN
replace ks11 = 5 - ks11	//Reorienting it so that higher values represent "better" outcomes
		
********************************************************************************

* RESULTS - TABLE

cd "$analysis"

***Label variables***
lab var skilled_provider_birth "Birth with a skilled provider"
lab var facility_birth "Birth at a facility"
lab var postpartum_postnatal_care "Postnatal care"
lab var z_content "Content of care"
lab var stunted "Stunting"
lab var underweight "Underweight"
lab var participation "Empowerment - Participation"
lab var ks11 "Empowerment - Vignette"

//Create the table shell
putexcel set "primary_outcome.xlsx", sheet("Primary Outcomes") replace

putexcel A1=("Outcome") B1=("Treatment Mean") C1=("Control Mean") D1=("Impact") ///
E1=("p-value") F1=("Effect Size") G1=("Sample Size") A11=("Number of Respondents") ///
A12=("Number of villages") B11=(3016) C11=(2985) B12=(100) C12=(100) ///
A14=("Treatment means are regression adjusted") A15=("*** p<0.01, ** p<0.05, * p<0.1")

//Export variable label, regression coefficient, control mean, treatment mean, p-value, sample size
//Effect sizes are exported separately (see below)
local outcomes skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11 participation z_content 
local row = 2
foreach var of local outcomes {
	sleep 2000
	local varlabel : var label `var'
	
	qui reg `var' treatment strata1 strata2 strata3, vce(cluster iddesa)
	
	local p = 2*ttail(e(df_r), abs(_b[treatment]/_se[treatment]))
	local stars
		if `p' < 0.10 local stars = "*" 
		if `p' < 0.05 local stars = "**" 
		if `p' < 0.01 local stars = "***"
	local impact = _b[treatment]
	
	qui sum `var' if treatment == 0
	putexcel A`row' = ("`varlabel'")
	putexcel B`row' = ((_b[treatment]) + (r(mean)))
	putexcel C`row' = (r(mean))
	putexcel D`row' = ("`impact'" + "`stars'")
	putexcel E`row' = (`p')
	putexcel G`row' = (e(N))
	local ++row
}

//Export effect sizes for outcomes that aren't already standardized
local simple_outcomes skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11
local row = 2
foreach var of local simple_outcomes {
	qui reg `var' treatment strata1 strata2 strata3, vce(cluster iddesa)
	qui sum `var' if treatment == 0
	putexcel F`row' = (_b[treatment]/r(sd))
	local ++row
}

//Export effect sizes for outcomes that are already standardized
local std_outcomes participation z_content 
local row = 8
foreach var of local std_outcomes {
	qui reg `var' treatment strata1 strata2 strata3, vce(cluster iddesa)
	putexcel F`row' = (_b[treatment])
	local ++row
}

********************************************************************************

* RESULTS - GRAPH

	/*
	
	A total of 8 primary outcomes are analyzed above. 
	
	1. Six of them are not standardized.
	skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11
	
	2. Two of them are standardized
	participation z_content
	
	*/

***Non-standardized outcomes***

local outcomes skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11 
foreach var of local outcomes {
	
	*Dividing by standard deviation of control group, to create a transformed outcome
	qui sum `var' if treatment == 0
	local sd_`var' = r(sd)
	g `var'_sd = `var'/r(sd)
	
	*Running regression on the transformed outcome
	qui reg `var'_sd treatment strata1 strata2 strata3, vce(cluster iddesa)
	estimates store e_`var'
}

***Standardized outcomes***
	
local outcomes participation z_content
foreach var of local outcomes {
	qui reg `var' treatment strata1 strata2 strata3, vce(cluster iddesa)
	estimates store e_`var'
}	

***Coefficient plots***

*X-axis: -0.4 to 0.4
#delimit ;
coefplot (e_skilled_provider_birth, aseq("Birth with a skilled provider") 
		\ e_facility_birth, aseq("Birth at a facility")
		\ e_postpartum_postnatal_care, aseq("Postnatal care")
		\ e_z_content, aseq("Content of care")
		\ e_stunted, aseq("Stunting")
		\ e_underweight, aseq("Underweight")
		\ e_participation, aseq("Empowerment - Participation")
		\ e_ks11, aseq("Empowerment - Vignette"))
	, keep(treatment) xline(0) swapnames title("Indonesia", size(medium))
	  note("Notes: Displaying 95% confidence intervals" "Higher values represent 'better' outcomes, except for stunting and underweight", size(vsmall)) 
	  xtitle("Effect Size (Standard Deviations)", size(small))
	  xlabel(-0.4(0.1)0.4) scheme(s2mono)
;	

#delimit cr

graph export "primary_outcomes_effect_sizes.png", as(png) replace

********************************************************************************

* ROBUSTNESS CHECK - ADDING BASELINE OUTCOME TO REGRESSION SPECIFICATION

merge m:1 iddesa using "$data/baseline_outcomes_by_village.dta"
drop _merge

***Label variables***
lab var skilled_provider_birth "Birth with a skilled provider"
lab var facility_birth "Birth at a facility"
lab var postpartum_postnatal_care "Postnatal care"
lab var z_content "Content of care"
lab var stunted "Stunting"
lab var underweight "Underweight"
lab var participation "Empowerment - Participation"
lab var ks11 "Empowerment - Vignette"

local primary skilled_provider_birth facility_birth postpartum_postnatal_care z_content stunted underweight participation ks11
foreach var of local primary {
	di "`var'"
	reg `var' treatment strata1 strata2 strata3 m_bl_`var', vce(cluster iddesa)
	outreg2 using "primary_outcomes_with_baseline.xls", append label keep(treatment m_bl_`var')
}

cap erase "primary_outcomes_with_baseline.txt"

********************************************************************************

* POWER CALCULATIONS

	*For primary outcomes: What effect size is the analysis powered to detect?

bysort iddesa: g cluster_size = _N

	//Create the table shell
putexcel set "primary_outcome_mde.xlsx", sheet("MDE") replace
putexcel A1 = ("Outcome") B1=("Endline Mean") C1=("Detectable Difference") D1=("Detectable Difference (Std. Dev.)") 

	//Cluster size: Coefficient of variation
sum cluster_size
local cv = r(sd)/r(mean)

	//Calculate the detectable difference
local row = 2	
local outcomes skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11 participation z_content 
foreach var of local outcomes  {
	
	local varlabel : var label `var'
	
	loneway `var' iddesa
	local rho = r(rho)
	
	sum  `var' 
	local mean_`var'= r(mean)
	local sd_`var' = r(sd)	
	
	sum `var' if treatment == 0
	local c_`var' = r(sd)
	
	clustersampsi, detectabledifference mu1(`mean_`var'') m(30) k(100) rho(`rho') sd1(`sd_`var'') size_cv(`cv')
	local dd_`var' = (r(DD))
	local dd_`var'2 = `dd_`var''/`c_`var''
	
	putexcel A`row' = ("`varlabel'") 
	putexcel B`row' = ("`mean_`var''")
	putexcel C`row' = ("`dd_`var''")	
	putexcel D`row' = ("`dd_`var'2'")
	local ++row
	
}

	//For outcomes that were previously already standardized: Column D (in std. dev. units) is irrelevant
putexcel D8 = (" ")
putexcel D9 = (" ") 

********************************************************************************


/* LIST OF SUB-GROUP CATEGORIES AND THE ASSOCIATED VARIABLES
	
	Note that the following codes apply:
		V. Other, specify _________
		X. REFUSED
		Y. DON'T KNOW
		
		95. Other, specify _________
		97. REFUSED
		98. DON'T KNOW
		99. MISSING
	
	1. Province - lk01_cd
	
	2. Time of birth - age_in_months (this variable was created in 9_a_hh)
	
	3. Facility characteristics (from baseline)
	
	
*/


* PREPARING VARIABLES FOR SUB-GROUP ANALYSIS, CATEGORIES 1 & 2

***Category 1: Province***
tab lk01_cd
g banten = (lk01_cd == "36")
g south_sulawesi = (lk01_cd == "73")
g indicator_banten_treatment = banten*treatment
tab indicator_banten_treatment

***Category 2: Time of birth***
tab age_in_months
g younger_birth = (age_in_months <= 6)
g indicator_younger_treatment = younger_birth*treatment
tab indicator_younger_treatment

***Label variables***
lab var skilled_provider_birth "Birth with a skilled provider"
lab var facility_birth "Birth at a facility"
lab var postpartum_postnatal_care "Postnatal care"
lab var z_content "Content of care"
lab var stunted "Stunting"
lab var underweight "Underweight"
lab var participation "Empowerment - Participation"
lab var ks11 "Empowerment - Vignette"

********************************************************************************

* GRAPH - CATEGORY 1 (PROVINCE)

drop skilled_provider_birth_sd-_est_e_ks11

	/*
	
	A total of 8 primary outcomes are analyzed above. 
	
	1. Six of them are not standardized.
	skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11
	
	2. Two of them are standardized
	participation z_content
	
	*/

***Non-standardized outcomes***

local outcomes skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11 
foreach var of local outcomes {
	
	*Dividing by standard deviation of control group, to create a transformed outcome
	qui sum `var' if treatment == 0
	local sd_`var' = r(sd)
	g `var'_sd = `var'/r(sd)
	
	*Running regression on the transformed outcome
	qui reg `var'_sd treatment banten indicator_banten_treatment strata1 strata2 strata3, vce(cluster iddesa)
	estimates store e_`var'
}

***Standardized outcomes***
	
local outcomes participation z_content
foreach var of local outcomes {
	qui reg `var' treatment banten indicator_banten_treatment strata1 strata2 strata3, vce(cluster iddesa)
	estimates store e_`var'
}	

***Coefficient plots***

*X-axis: -0.4 to 0.4
#delimit ;
coefplot (e_skilled_provider_birth, aseq("Birth with a skilled provider") 
		\ e_facility_birth, aseq("Birth at a facility")
		\ e_postpartum_postnatal_care, aseq("Postnatal care")
		\ e_z_content, aseq("Content of care")
		\ e_stunted, aseq("Stunting")
		\ e_underweight, aseq("Underweight")
		\ e_participation, aseq("Empowerment - Participation")
		\ e_ks11, aseq("Empowerment - Vignette"))
	, keep(indicator_banten_treatment) xline(0) swapnames title("Indonesia", size(medium))
	  note("Notes: Displaying 95% confidence intervals" "Higher values represent 'better' outcomes, except for stunting and underweight", size(vsmall)) 
	  xtitle("Interaction: Treatment*Banten Province (Standard Deviations)", size(small))
	  xlabel(-0.4(0.1)0.4) scheme(s2mono)
;	

#delimit cr

graph export "$analysis/subgroup_analysis_province.png", as(png) replace

********************************************************************************

* GRAPH - CATEGORY 2 (TIME OF BIRTH)

drop skilled_provider_birth_sd-_est_e_ks11

	/*
	
	A total of 8 primary outcomes are analyzed above. 
	
	1. Six of them are not standardized.
	skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11
	
	2. Two of them are standardized
	participation z_content
	
	*/

***Non-standardized outcomes***

local outcomes skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11 
foreach var of local outcomes {
	
	*Dividing by standard deviation of control group, to create a transformed outcome
	qui sum `var' if treatment == 0
	local sd_`var' = r(sd)
	g `var'_sd = `var'/r(sd)
	
	*Running regression on the transformed outcome
	qui reg `var'_sd treatment younger_birth indicator_younger_treatment strata1 strata2 strata3, vce(cluster iddesa)
	estimates store e_`var'
}

***Standardized outcomes***
	
local outcomes participation z_content
foreach var of local outcomes {
	qui reg `var' treatment younger_birth indicator_younger_treatment strata1 strata2 strata3, vce(cluster iddesa)
	estimates store e_`var'
}	

***Coefficient plots***

*X-axis: -0.4 to 0.4
#delimit ;
coefplot (e_skilled_provider_birth, aseq("Birth with a skilled provider") 
		\ e_facility_birth, aseq("Birth at a facility")
		\ e_postpartum_postnatal_care, aseq("Postnatal care")
		\ e_z_content, aseq("Content of care")
		\ e_stunted, aseq("Stunting")
		\ e_underweight, aseq("Underweight")
		\ e_participation, aseq("Empowerment - Participation")
		\ e_ks11, aseq("Empowerment - Vignette"))
	, keep(indicator_younger_treatment) xline(0) swapnames title("Indonesia", size(medium))
	  note("Notes: Displaying 95% confidence intervals" "Higher values represent 'better' outcomes, except for stunting and underweight", size(vsmall)) 
	  xtitle("Interaction: Treatment*Earlier birth (Standard Deviations)", size(small))
	  xlabel(-0.4(0.1)0.4) scheme(s2mono)
;	

#delimit cr

graph export "$analysis/subgroup_analysis_time_of_birth.png", as(png) replace

********************************************************************************

* CATEGORY 3 (BASELINE FACILITY QUALITY)

drop skilled_provider_birth_sd-_est_e_ks11

merge m:1 iddesa using "$data/Baseline_facility_quality.dta"
drop _merge

	//Health service capacity
g high_capacity = (capacity_a >= 9 & capacity_a <= 12)
g low_capacity = (capacity_a >= 0 & capacity_a <= 4)
g medium_capacity = (capacity_a >= 5 & capacity_a <= 8)

g treatment_high_capacity = treatment*high_capacity
g treatment_low_capacity = treatment*low_capacity
g treatment_medium_capacity = treatment*medium_capacity

	//Label variables
lab var treatment_high_capacity "Treatment*High quality"
lab var treatment_low_capacity "Treatment*Low quality" 
lab var treatment_medium_capacity "Treatment*Medium quality"

local outcomes skilled_provider_birth facility_birth postpartum_postnatal_care stunted underweight ks11 
foreach var of local outcomes {
	
	*Dividing by standard deviation of control group, to create a transformed outcome
	qui sum `var' if treatment == 0
	local sd_`var' = r(sd)
	g `var'_sd = `var'/r(sd)
	
	*Running regression on the transformed outcome
	qui reg `var'_sd treatment high_capacity medium_capacity treatment_high_capacity treatment_medium_capacity strata1 strata2 strata3, vce(cluster iddesa)
	estimates store e_`var'
}
	
local outcomes participation z_content
foreach var of local outcomes {
	qui reg `var' treatment high_capacity medium_capacity treatment_high_capacity treatment_medium_capacity strata1 strata2 strata3, vce(cluster iddesa)
	estimates store e_`var'
}	

#delimit ;
coefplot (e_skilled_provider_birth, aseq("Birth with a skilled provider") 
		\ e_facility_birth, aseq("Birth at a facility")
		\ e_postpartum_postnatal_care, aseq("Postnatal care")
		\ e_z_content, aseq("Content of care")
		\ e_stunted, aseq("Stunting")
		\ e_underweight, aseq("Underweight")
		\ e_participation, aseq("Empowerment - Participation")
		\ e_ks11, aseq("Empowerment - Vignette"))
	, keep(treatment_high_capacity) xline(0) swapnames title("Indonesia: Health service capacity", size(medium))
	  note("Notes: Displaying 95% confidence intervals" "Higher values represent 'better' outcomes, except for stunting and underweight", size(vsmall)) 
	  xtitle("Interaction: Treatment*High quality facility (Standard Deviations)", size(small))
	  xlabel(-1(0.2)1) scheme(s2mono)
;	

#delimit cr

graph export "$analysis/subgroup_analysis_provider_capacity_high.png", as(png) replace

#delimit ;
coefplot (e_skilled_provider_birth, aseq("Birth with a skilled provider") 
		\ e_facility_birth, aseq("Birth at a facility")
		\ e_postpartum_postnatal_care, aseq("Postnatal care")
		\ e_z_content, aseq("Content of care")
		\ e_stunted, aseq("Stunting")
		\ e_underweight, aseq("Underweight")
		\ e_participation, aseq("Empowerment - Participation")
		\ e_ks11, aseq("Empowerment - Vignette"))
	, keep(treatment_medium_capacity) xline(0) swapnames title("Indonesia: Health service capacity", size(medium))
	  note("Notes: Displaying 95% confidence intervals" "Higher values represent 'better' outcomes, except for stunting and underweight", size(vsmall)) 
	  xtitle("Interaction: Treatment*Medium quality facility (Standard Deviations)", size(small))
	  xlabel(-1(0.2)1) scheme(s2mono)
;	

#delimit cr

graph export "$analysis/subgroup_analysis_provider_capacity_medium.png", as(png) replace

********************************************************************************

clear


		
